#爬取上饶房产网二手房出租信息
import requests
from lxml import etree
import xlwt
from xlwt import Workbook

book = Workbook(encoding='utf-8')
url = 'http://www.srfcwang.com/rent/list.html'
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                 'Chrome/69.0.3497.92 Safari/537.36',
    'Referer':'http://www.srfcwang.com/'
    }

p = 1
while p <= 50:
    sheetname = '第'+str(p)+'页'
    sheetp = book.add_sheet(sheetname)
    sheetp.write(0,0,'标题')
    sheetp.write(0,1,'地段')
    sheetp.write(0,2,'房源名称')
    sheetp.write(0,3,'标签')
    sheetp.write(0,4,'户型')
    sheetp.write(0,5,'面积')
    sheetp.write(0,6,'月租')
    sheetp.write(0,7,'楼层')
    sheetp.write(0,8,'装修情况')
    sheetp.write(0,9,'朝向')
    sheetp.write(0,10,'房龄')
    sheetp.write(0,11,'发布日期')
    sheetp.write(0,12,'房源链接')
    response = requests.get(url,headers=headers)
    xml = etree.HTML(response.text)
    title_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/a/@title')  #标题
    diduan_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/div/p[1]/text()')  #地段
    name_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/div/p[1]/a/text()')  #房源名称
    alldetails_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/div/p[last()-1]/text()')  #详细描述
    time_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/div/p[@class="gray9"]/text()')  #发布日期
    money_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/div/span/b/text()')  #月租
    href_list = xml.xpath('//div[@class="hlist_fl"]/ul/li/a/@href')  #房源链接
    
    biaoqian_list = []
    #biaoq_list = []
    for i in range(1,17):
        x = '//div[@class="hlist_fl"]/ul/li['+str(i)+']/div/p[2]/span/text()'
        biao_list = xml.xpath(x) #标签
        #biaoq_list += biao_list
        b=''
        for b1 in biao_list:
            b += '、'+b1
        biaoqian_list.append(b)
    print(len(biaoqian_list),len(title_list))
    
    for i in range(0,len(name_list)):
        sheetp.write(i+1,0,title_list[i])
        sheetp.write(i+1,1,diduan_list[i])
        sheetp.write(i+1,2,name_list[i])
        sheetp.write(i+1,3,biaoqian_list[i][1:])
        sheetp.write(i+1,4,alldetails_list[6*i])
        sheetp.write(i+1,5,alldetails_list[6*i+1])
        sheetp.write(i+1,6,money_list[i])
        sheetp.write(i+1,7,alldetails_list[6*i+2])
        sheetp.write(i+1,8,alldetails_list[6*i+3])
        sheetp.write(i+1,9,alldetails_list[6*i+4])
        sheetp.write(i+1,10,alldetails_list[6*i+5])
        sheetp.write(i+1,11,time_list[i])
        sheetp.write(i+1,12,href_list[i])
        
    print('已保存第',p,'页')
    import datetime
    now_time = str(datetime.datetime.now())[0:10]
    title = now_time+'房产出租信息(上饶房产网).xlsx'
    book.save(title)
    
    y = xml.xpath('//div[@class="pagination"]/a[last()]/text()')[0]+''
    page = xml.xpath('//div[@class="pagination"]/a[last()]/@href')[0]
    if y == '下一页':
        p += 1
        url = 'http://www.srfcwang.com/rent/'+page
    else:break
